# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)
library(sandwich)


# === produce left panels: Figure 7(a), (c), (e)
rm(list = ls())

# monthly style portfolios
data = readRDS('input_data/morningstar_style_data.RDS')[, list(yyyymm, category, rating, flow, ret)]
data = data[(yyyymm >= 200201) & (yyyymm <= 200212)] # focus on 12 month window
data = data.table(melt(data, id.vars = c('yyyymm','category')))
names(data)[3] = 'var'

# demean by period to focus on cross-sectional dispersion
data = merge(data, data[, list(m = mean(value)), list(yyyymm, var)], by = c('yyyymm','var'))
data[, value := value - m]
data[, m := NULL]

# sort by predicted style-level rating change using Dec 2001 data
tmp = readRDS('input_data/predicted_style_rating_change_dec_2001.RDS')
tmp = tmp[order(-predicted_rating_change)]
tmp[, rank := 1:9]
tmp[, predicted_rating_change := NULL]
data = merge(data, tmp, by = 'category'); rm(tmp)

# enrich for plotting purposes
data[, date := as.Date(as.character(100*yyyymm+1), '%Y%m%d')]
data = data[rank %in% c(1,2,8,9)]
data[, lab := ifelse(rank == 9, 'Most negatively affected',ifelse(rank == 8, 'Second neg',ifelse(rank == 1, 'Most positively affected', 'Second pos')))]
data_bk = copy(data)
event_date = as.Date('2002-06-01')

# output: Figure 7(a): ratings
data = copy(data_bk[var == 'rating'])
ggplot(data, aes(x = date, y = value, color = reorder(lab, -rank))) + geom_line(lwd = 1) + theme_classic() + 
  labs(x = NULL, y = 'Demeand rating') + theme(legend.position = c(.75, .8), legend.title = element_blank()) + 
  geom_vline(xintercept = event_date, lty = 3) + coord_cartesian(ylim = c(-.8, .8))

# output: Figure 7(c), cumulative flow
data = copy(data_bk[var == 'flow'])
for (this in c(1,2,8,9)){data[rank == this, value := cumsum(value)]}
rm(this)
ggplot(data, aes(x = date, y = value, color = reorder(lab, -rank))) + geom_line(lwd = 1) + theme_classic() + 
  labs(x = NULL, y = 'Cumulative fund flow') + theme(legend.position = c(.75, .9), legend.title = element_blank()) + 
  geom_vline(xintercept = event_date, lty = 3) + coord_cartesian(ylim = c(-.15, .23)) + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

# output: Figure 7(e), cumulative returns, 2002
data = copy(data_bk[var == 'ret'])
for (this in c(1,2,8,9)){data[rank == this, value := cumsum(log(1+value))]}
rm(this)
ggplot(data, aes(x = date, y = value, color = reorder(lab, -rank))) + geom_line(lwd = 1) + theme_classic() + 
  labs(x = NULL, y = 'Cumulative log return') + theme(legend.position = c(.75, .9), legend.title = element_blank()) + 
  geom_vline(xintercept = event_date, lty = 3) + coord_cartesian(ylim = c(-.15, .23)) + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))


# === produce right panels: Figure 7(b), (d), (f)
rm(list = ls())

data = readRDS('input_data/morningstar_style_data.RDS')[, list(yyyymm, category, ret = ret, flow = flow, rating = rating_1)]
data = data.table(melt(data, id.vars = c('yyyymm','category')))
names(data)[3] = 'var'

# take out cross-sectional averages
data = merge(data, data[, list(m = mean(value)), list(yyyymm, var)], by = c('yyyymm','var'))
data[, value := value - m]
data[, m := NULL]

# compute change before vs after June
data[, yyyy := floor(yyyymm/100)]
data[, mm := yyyymm - 100*yyyy]
data[, post := ifelse(mm %in% 7:12, 2, 1)]
data = data[, list(value = mean(value)), list(yyyy, var, category, post)]
data = merge(data[post == 2, list(yyyy, var, category, value_post = value)], 
             data[post == 1, list(yyyy, var, category, value_pre = value)], by = c('yyyy','var','category'))
data = data[, list(yyyy, var, category, value = value_post - value_pre)]

# rank in each year based on previous december data
tmp = readRDS('input_data/predicted_style_rating_change_dec_all_years.RDS')
for (this in unique(tmp[, yyyy])){
  tmp[yyyy == this, rank := 1:9]
}
tmp[, predChg := NULL]
data = merge(data, tmp, by = c('yyyy','category')); rm(tmp)
data[, category := NULL]

data = rbind(data[yyyy == 2002, list(type = '2002', var, rank, value, se = NA)],
             data[yyyy != 2002, list(type = 'Other years', value = mean(value), se = sd(value)/sqrt(length(yyyy))), list(var, rank)])
data = data[order(var, type, rank)]
data_bk = copy(data)


# = output: Figure 7(b), ratings change
data = copy(data_bk)[var == 'rating']
ggplot(data, aes(x = rank, y = value, fill = type)) + geom_bar(stat = 'identity', position = 'dodge') + 
  geom_errorbar(aes(ymin = value - 1.96*se, ymax = value + 1.96*se), position = 'dodge')  + theme_classic() + 
  theme(legend.title = element_blank(), legend.position = c(.7, .9)) + 
  scale_x_discrete(limits = factor(1:9), labels = c('Pos',2:8,'Neg'), name = 'Styles, ranked by predicted rating change') + 
  scale_y_continuous('Average rating change')

# = output: Figure 7(d), flow change
data = copy(data_bk)[var == 'flow']
ggplot(data, aes(x = rank, y = value, fill = type)) + geom_bar(stat = 'identity', position = 'dodge') + 
  geom_errorbar(aes(ymin = value - 1.96*se, ymax = value + 1.96*se), position = 'dodge')  + theme_classic() + 
  theme(legend.title = element_blank(), legend.position = c(.7, .9)) + 
  scale_x_discrete(limits = factor(1:9), labels = c('Pos',2:8,'Neg'), name = 'Styles, ranked by predicted rating change') + 
  scale_y_continuous('Monthly flow change', labels = scales::percent_format(accuracy = 0.1)) + 
  coord_cartesian(ylim = c(-.03, .015))


# = output: Figure 7(f), return change
data = copy(data_bk)[var == 'ret']
ggplot(data, aes(x = rank, y = value, fill = type)) + geom_bar(stat = 'identity', position = 'dodge') + 
  geom_errorbar(aes(ymin = value - 1.96*se, ymax = value + 1.96*se), position = 'dodge')  + theme_classic() + 
  theme(legend.title = element_blank(), legend.position = c(.7, .9)) + 
  scale_x_discrete(limits = factor(1:9), labels = c('Pos',2:8,'Neg'), name = 'Styles, ranked by predicted rating change') + 
  scale_y_continuous('Monthly return change', labels = scales::percent_format(accuracy = 1)) + 
  coord_cartesian(ylim = c(-.04, .03))


